This file is for exploratory research for Northwestern University’s City of Evanston Open Data Contest: https://sites.northwestern.edu/lovedataweek/contest/

Deadline: Monday, April 1 @ 3:00 PM

## Libraries
library(ggplot2)
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble  2.0.0     ✔ purrr   0.2.5
## ✔ tidyr   0.8.2     ✔ dplyr   0.7.8
## ✔ readr   1.3.0     ✔ stringr 1.3.1
## ✔ tibble  2.0.0     ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(leaflet)
library(leaflet.extras)
library(htmlwidgets)
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.3-6, (SVN revision 773)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.1.3, released 2017/20/01
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/gdal
##  GDAL binary built with GEOS: FALSE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/proj
##  Linking to sp version: 1.3-1
theme_set(theme_linedraw())

Traffic Stops 2017-2018

traffic_data <- read.csv("https://data.cityofevanston.org/resource/bbbk-a8vu.csv?$limit=35000")
levels(traffic_data$day_of_the_week) <- c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")

Stops by Day of Week/Race

traffic_data %>%
  ggplot() +
  geom_bar(aes(day_of_the_week)) +
  facet_wrap(~race)

#### Stops by Race/Beat

traffic_data %>%
  ggplot() +
  geom_bar(aes(race)) +
  facet_wrap(~police_beat)

## Evanston Arrests

arrest_data <- read.csv("https://data.cityofevanston.org/resource/jwmg-4gbx.csv")

arrest_data$loc <- as.character(arrest_data$location)

arrest_data <- arrest_data %>%
  drop_na(loc)

arrest_data$loc <- arrest_data$loc %>%
  substring(8)

arrest_data$loc <- arrest_data$loc %>%
  str_sub(1, str_length(arrest_data$loc)-1)

arrest_data_lonlat <- arrest_data %>%
  separate(col = loc, c("lon", "lat"), sep = " ")
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 76 rows [38,
## 361, 363, 367, 390, 394, 396, 397, 400, 415, 417, 418, 421, 422, 426, 427,
## 428, 429, 447, 451, ...].
arrest_data_lonlat$lon <- as.numeric(arrest_data_lonlat$lon)
arrest_data_lonlat$lat <- as.numeric(arrest_data_lonlat$lat)

arrest_data_lonlat <- arrest_data_lonlat %>%
  drop_na(lat)

Mapping Arrests in Evanston

This is a map of arrests made in Evanston.

leaflet() %>%
  addCircleMarkers(lng=arrest_data_lonlat$lon, lat=arrest_data_lonlat$lat, popup = arrest_data_lonlat$arrest_type) %>%
  addProviderTiles(providers$CartoDB.Positron)

This is a heatmap of arrests made in Evanston.

leaflet(arrest_data_lonlat) %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>%
  addHeatmap(lng = ~lon, lat = ~lat,
             blur = 30, max = 0.05, radius = 15)

All Evanston Police Activity

pdata <- read.csv("https://data.cityofevanston.org/resource/t5g5-xgby.csv?$limit=200000")

pdata$location <- as.character(pdata$location)

pdata <- pdata %>%
  filter(location != "")

pdata$loc <- pdata$location

pdata$loc <- pdata$loc %>%
  substring(8)

pdata$loc <- pdata$loc %>%
  str_sub(1, str_length(pdata$loc)-1)

pdata <- pdata %>%
  separate(col = loc, c("lon", "lat"), sep = " ")

pdata$lon <- as.numeric(pdata$lon)
pdata$lat <- as.numeric(pdata$lat)

This is a heatmap of all police activity in Evanston.

pdata %>%
  leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>%
  addHeatmap(lng = ~lon, lat = ~lat,
             blur = 20, radius = 8) 

Tree Information

## Read in Data
tdata <- read.csv("Trees_data.csv")

##Set Geo
tdata$location <- as.character(tdata$the_geom)

tdata <- tdata %>%
  filter(location != "")

tdata$loc <- tdata$location

tdata$loc <- tdata$loc %>%
  substring(8)

tdata$loc <- tdata$loc %>%
  str_sub(1, str_length(tdata$loc)-1)

tdata <- tdata %>%
  separate(col = loc, c("lon", "lat"), sep = " ")

tdata$lon <- as.numeric(tdata$lon)
tdata$lat <- as.numeric(tdata$lat)

Types of Trees

tdata %>%
  group_by(Species) %>%
  count(sort=TRUE) %>%
  head(30)

Stop and Frisk

Data from https://data.cityofevanston.org/browse?q=Stop%20and%20Frisk&sortBy=relevance

sfdata <- read.csv("Stop_and_Frisk.csv")

By Race

sfdata %>%
  group_by(Race) %>%
  ggplot(aes(Race)) +
  geom_bar() +
  coord_flip() +
  scale_y_continuous(expand = c(0,0), limits = c(0, 1300)) 

By Reason

sfdata %>%
  mutate(Reason.Code.Description = fct_rev(fct_infreq(Reason.Code.Description))) %>%
  ggplot(aes(Reason.Code.Description)) +
  geom_bar() +
  coord_flip() +
  scale_y_continuous(expand = c(0,0))

sfdata %>%
  group_by(Race) %>%
  ggplot(aes(Race)) +
  geom_bar() +
  coord_flip() +
  facet_grid(~Enforcement) +
  scale_y_continuous(expand = c(0,0))

sfdata %>%
  filter(grepl('STREET', Reason.Code.Description)) %>%
  ggplot(aes(Race, Age)) +
  geom_violin()
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).

sfdata %>%
  group_by(Reason.Code.Description, Enforcement) %>%
  summarise (n = n()) %>%
  mutate(freq = n / sum(n)) %>%
  filter(Enforcement == "ARREST") %>%
  print(n=15)
## # A tibble: 15 x 4
## # Groups:   Reason.Code.Description [15]
##    Reason.Code.Description           Enforcement     n   freq
##    <fct>                             <fct>       <int>  <dbl>
##  1 GANG NARC/RSO                     ARREST          4 0.571 
##  2 GANG/NARCOTIC                     ARREST         11 0.275 
##  3 OTHER                             ARREST         22 0.186 
##  4 STREET STOP                       ARREST         35 0.145 
##  5 STREET STOP/GANG NARC             ARREST          7 0.115 
##  6 STREET STOP/RSO                   ARREST          1 0.333 
##  7 STREET STOP/SUSP PERSON           ARREST          9 0.0744
##  8 STREET STOP/SUSP PERSON/GANG NARC ARREST          3 0.115 
##  9 SUSP PERSON/GANG NARC             ARREST          3 0.273 
## 10 SUSPICIOUS PERSON                 ARREST          2 0.0426
## 11 TRAF STOP/GANG NARC               ARREST         30 0.0804
## 12 TRAF STOP/GANG NARC/RSO           ARREST          1 0.0769
## 13 TRAF STOP/SUSP PERS/GANG NARC     ARREST          2 0.4   
## 14 TRAF STOP/SUSP PERSON/GANG/RSO    ARREST          4 0.0816
## 15 TRAFFIC STOP                      ARREST         78 0.138
sfdata %>%
  group_by(Race, Enforcement) %>%
  summarise (n = n()) %>%
  mutate(freq = n / sum(n)) %>%
  filter(Enforcement == "ARREST")

Evanston Population Overall

pop <- read.csv("https://data.cityofevanston.org/resource/j228-4hwk.csv")

Evanston Salary Information

saldata <- read.csv("https://data.cityofevanston.org/resource/qehy-v72q.csv")
saldata %>%
  arrange(desc(earnings_amount))

Evanston Restaurant Info

## Food Establishment Businesses with Inspections 2018-2019
restdata <- read.csv("https://data.cityofevanston.org/resource/x9f3-wjav.csv")

### Get Lat and Lon from Point
    restdata$location <- as.character(restdata$location)
    
    restdata <- restdata %>%
      filter(location != "")
    
    restdata$loc <- restdata$location
    
    restdata$loc <- restdata$loc %>%
      substring(8)
    
    restdata$loc <- restdata$loc %>%
      str_sub(1, str_length(restdata$loc)-1)
    
    restdata <- restdata %>%
      separate(col = loc, c("lon", "lat"), sep = " ")
    
    restdata$lon <- as.numeric(restdata$lon)
    restdata$lat <- as.numeric(restdata$lat)

## Food Establishment Violations
viodata <- read.csv("https://data.cityofevanston.org/resource/rgdf-vw4g.csv")

## Food Establishment Inspections
inspdata<- read.csv("https://data.cityofevanston.org/resource/dj7x-d2cq.csv")

inspdata$inspection_date <- as.Date(as.character(inspdata$inspection_date))

Join data

jresdata <- inner_join(restdata, inspdata)
## Joining, by = "business_license"
## Warning: Column `business_license` joining factors with different levels,
## coercing to character vector
jresdata %>%
  group_by(business_license) %>%
  slice(which.max(inspection_date))

Map most recent scores

pal <- colorNumeric("RdYlBu", domain = jresdata$inspection_score)

leaflet() %>%
  addCircleMarkers(lng=jresdata$lon, lat=jresdata$lat, popup = jresdata$business_name, stroke = FALSE, fillColor = pal(jresdata$inspection_score)) %>%
  addProviderTiles(providers$CartoDB.Positron)